# -*- coding: utf-8 -*-
import scrapy


class XicidailiSpider(scrapy.Spider):
    name = "xicidaili"
    allowed_domains = ["xicidaili.com"]
    start_urls = (
        'http://www.xicidaili.com/nn/',
    )

    def parse(self, response):
        tr_list=response.xpath("//table[@id='ip_list']//tr")[1:]
        print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
        for tr in tr_list:
            item={}
            item['ip']=tr.xpath("./td[2]/text()").extract_first()
            item['port']=tr.xpath("./td[3]/text()").extract_first()
            item['http_or_https']=tr.xpath("./td[6]/text()").extract_first()
            item['alive_time']=tr.xpath("./td[9]/text()").extract_first()
            yield item

        next_url=response.xpath("//a[@class='next_page']/@href").extract_first()
        while len(next_url)>0:
            start_url="http://www.xicidaili.com"+str(next_url)

            yield scrapy.Request(start_url,callback=self.parse,)
